import os
import re

import requests
from lxml import etree

start = 0

url=f"https://movie.douban.com/top?start={start}&filter="

headers = {
    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1 Edg/131.0.0.0',
    'Cookie':'viewed="27104959"; bid=NmOAnY-VFEI; _vwo_uuid_v2=DD541ECF8E6C9D1BAABB0B9E1A532F9D8|e6e248ef92443ee8c98beaa155914aad; ap_v=0,6.0; __utma=30149280.1601933346.1736256352.1736256352.1736771158.2; __utmb=30149280.0.10.1736771158; __utmc=30149280; __utmz=30149280.1736771158.2.2.utmcsr=localhost:63342|utmccn=(referral)|utmcmd=referral|utmcct=/',
    'Referer':'https://movie.douban.com/explore'
}



response = requests.get(url, headers=headers)

data = response.content.decode()

tree = etree.HTML(data)

title_list = (tree.xpath('//div[@class="hd"]/a/span/text()'))




# 去除乱码
for i in range(len(title_list)):
    title_list[i] = re.sub(r'\s+', '', title_list[i])
print(title_list)